# Required packages
import numpy as np
import pandas as pd
import pycountry
import pycountry_convert
import re
# Visualisation libraries
## progressbar
import progressbar
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (17, 6)
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
In this article, we visualize the data available from the Kaggle survey in three consecutive years (2017, 2018, and 2019). The results include raw numbers about who is working with data, what’s happening with machine learning in different industries, and the best ways for new data scientists to break into the field. We've published the data in as raw a format as possible without compromising anonymization, which makes it an unusual example of a survey dataset.
Data20 = pd.read_csv('kaggle-survey/kaggle-survey-2019/multiple_choice_responses.csv', header=1)
Data19 = pd.read_csv('kaggle-survey/kaggle-survey-2019/multiple_choice_responses.csv', header=1)
Data18 = pd.read_csv('kaggle-survey/kaggle-survey-2018/multipleChoiceResponses.csv', header=1)
Data17 = pd.read_csv('kaggle-survey/kaggle-survey-2017/multipleChoiceResponses.csv',encoding='ISO-8859-1')
def Header(Text, L = 100, C = 'Blue', T = 'White'):
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(BACK[C] + FORE[T] + Style.NORMAL + Text + Style.RESET_ALL + ' ' + FORE[C] +
Style.NORMAL + (L- len(Text) - 1)*'=' + Style.RESET_ALL)
def Line(L=100, C = 'Blue'):
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(FORE[C] + Style.NORMAL + L*'=' + Style.RESET_ALL)
def Search_List(Key, List): return [s for s in List if Key in s]
def Search_df(df, key):
Mylist = df.columns.tolist()
return [s for s in Mylist if key in s]
Renaming Columns
def Rename_func(df):
return df.rename(columns = {'In which country do you currently reside?':'Country',
'What is your gender? - Selected Choice': 'Gender',
'GenderSelect': 'Gender',
'What is your age (# years)?':'Age Group',
'What is the highest level of formal education that you have attained or plan to attain within the next 2 years?':'FormalEducation',
'Select the title most similar to your current role (or most recent title if retired): - Selected Choice':'CurrentJobTitle',
'CurrentJobTitleSelect':'CurrentJobTitle',
'What is your current yearly compensation (approximate $USD)?':'CurrentSalary',
'What is the size of the company where you are employed?':'CompanySize',
'Approximately how many individuals are responsible for data science workloads at your place of business?':'DataScienceTeamSize'
})
Data17 = Rename_func(Data17)
Data18 = Rename_func(Data18)
Data19 = Rename_func(Data19)
Data20 = Rename_func(Data20)
del Rename_func
Droping Columns
Cols = {'What is your gender? - Prefer to self-describe - Text',
'Select the title most similar to your current role (or most recent title if retired): - Other - Text',
'Select any activities that make up an important part of your role at work: (Select all that apply) - Other - Text'}
Data18.drop(columns = Cols, inplace = True)
Data19.drop(columns = Cols, inplace = True)
Data20.drop(columns = Cols, inplace = True)
Columns = Data19.columns.tolist()
Select_Cols = Search_List('Select', Columns)
def Countries_func(Col):
return Col.replace(
{'United States of America': 'United States', 'Viet Nam': 'Vietnam', "People 's Republic of China": 'China',
'Republic of China': 'China', "United Kingdom of Great Britain and Northern Ireland": 'United Kingdom',
"Hong Kong (S.A.R.)": 'Hong Kong', 'Republic of Korea': 'South Korea', 'Iran, Islamic Republic of...': 'Iran',
'I do not wish to disclose my location': 'Other'})
Data17['Country'] = Countries_func(Data17['Country'])
Data18['Country'] = Countries_func(Data18['Country'])
Data19['Country'] = Countries_func(Data19['Country'])
Data20['Country'] = Countries_func(Data20['Country'])
del Countries_func
Temp = ['Prefer to self-describe', 'Prefer not to say','Non-binary, genderqueer, or gender non-conforming',
'A different identity', np.nan]
Data17.loc[Data17.Gender.isin(Temp), 'Gender'] = 'Other'
Data18.loc[Data18.Gender.isin(Temp), 'Gender'] = 'Other'
Data19.loc[Data19.Gender.isin(Temp), 'Gender'] = 'Other'
Data20.loc[Data20.Gender.isin(Temp), 'Gender'] = 'Other'
del Temp
def Coutry_Continent(x):
try:
Out = pycountry_convert.country_name_to_country_alpha2(x, cn_name_format="default")
Out = pycountry_convert.country_alpha2_to_continent_code(Out)
Out = pycountry_convert.convert_continent_code_to_continent_name(Out)
except:
Out = np.nan
return Out
Data17['Continent'] = Data17.Country.apply(lambda x: Coutry_Continent(x))
Data18['Continent'] = Data18.Country.apply(lambda x: Coutry_Continent(x))
Data19['Continent'] = Data19.Country.apply(lambda x: Coutry_Continent(x))
Data20['Continent'] = Data20.Country.apply(lambda x: Coutry_Continent(x))
del Coutry_Continent
def Age_Group(x):
if 18<= x <= 21: Out = '18-21'
elif 22<= x <= 24: Out = '22-24'
elif 25<= x <= 29: Out = '25-29'
elif 30<= x <= 34: Out = '30-34'
elif 35<= x <= 39: Out = '35-39'
elif 40<= x <= 44: Out = '40-44'
elif 45<= x <= 49: Out = '45-49'
elif 50<= x <= 54: Out = '50-54'
elif 55<= x <= 59: Out = '55-59'
elif 60<= x <= 69: Out = '60-69'
elif 70<= x: Out = '70+'
else: Out = np.nan
return Out
Data17['Age Group'] = Data17['Age'].apply(lambda x: Age_Group(x))
Data18['Age Group'] = Data18['Age Group'].replace({'70-79':'70+', '80+':'70+'})
del Age_Group
def Education_func(Col):
return Col.replace(
{'I did not complete any formal education past high school':'No formal education past high school',
"Bachelor's degree":'Bachelor’s degree',"Master's degree": 'Master’s degree',
"Some college/university study without earning a bachelor's degree":
'Some college/university study without earning a bachelor’s degree'})
Data17['FormalEducation'] = Education_func(Data17['FormalEducation'])
Data18['FormalEducation'] = Education_func(Data18['FormalEducation'])
Data19['FormalEducation'] = Education_func(Data19['FormalEducation'])
Data20['FormalEducation'] = Education_func(Data20['FormalEducation'])
Data18.CurrentSalary.replace({'I do not wish to disclose my approximate yearly compensation':np.nan}, inplace = True)
Data19.CurrentSalary.replace({'I do not wish to disclose my approximate yearly compensation':np.nan}, inplace = True)
Data20.CurrentSalary.replace({'I do not wish to disclose my approximate yearly compensation':np.nan}, inplace = True)
def FeatDispPlot(Table, PD):
fig = make_subplots(rows=1, cols=2, vertical_spacing = 0.05, specs=[[{"type": "xy"}, {'type':'domain'}]])
# Left
fig.add_trace(go.Bar(x= Table[PD['xFeat']].values, y= Table[PD['yFeat']].values,
marker_color= PD['Colors'], textposition='inside', showlegend = False,
hovertext=list(Table['Percentage'])), 1, 1)
fig.update_traces(marker_line_color= PD['line_color'], marker_line_width= PD['line_width'],
opacity=1, row=1, col=1)
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range= PD['yLim'])
# Right
fig.add_trace(go.Pie(labels= Table[PD['xFeat']].values, values= Table[PD['yFeat']].values,
textfont=dict(size=16), marker=dict(colors = PD['Colors'],
line=dict(color='black', width=1))), 1, 2)
fig.update_traces(hole=PD['hole'], marker_line_color=PD['line_color'], marker_line_width= PD['line_width'],
pull=PD['pull'], opacity=1, row=1, col=2)
fig.update_layout(plot_bgcolor= 'white', width = 980,
title={'text': '<b>' + PD['Title'] + '<b>', 'x':0.5, 'y':0.90, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
PD = dict(xFeat = 'Year', yFeat = 'Responses', line_color = 'Navy', line_width = 1.25, Title = 'Responses', yLim = [0, 25e3],
hole = .4, pull = [0, 0, 0, 0.1], Colors = ['LightCoral','GreenYellow','LightSkyBlue','BlueViolet'])
Table = pd.DataFrame({'Year':[2017, 2018, 2019, 2020],
'Responses':[Data17.shape[0], Data18.shape[0], Data19.shape[0], Data20.shape[0]]})
Table['Percentage'] = np.round(100* Table['Responses'].values /Table['Responses'].sum(), 2)
FeatDispPlot(Table, PD)
A quick comparison between the number of responses by year shows that the number of responses in 2018 is the highest.
def TableAgg(Feat, data = [Data17, Data18, Data19, Data20], Lables = ['2017', '2018', '2019', '2020']):
Table = pd.DataFrame()
for df, year in zip(data, Lables):
if len(Feat)>1:
Temp = df.groupby(Feat)[Feat[0]].count().to_frame('Count')
else:
Temp = df.groupby(Feat)[Feat].count().rename(columns = {Feat[0]:'Count'})
Temp['Year'] = year
Table = pd.concat([Table, Temp], axis = 0)
Table = Table.reset_index(drop = False).fillna(0)
return Table
def YearBarPlot(Table, PD, xy = True):
if xy:
fig = px.bar(Table, x= PD['xFeat'], y= PD['yFeat'], orientation='v', color = PD['ColorFeat'],
text = PD['yFeat'], color_discrete_sequence = PD['Colors'], hover_data= Table.columns, barmode='group')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range= PD['Lims'])
else:
fig = px.bar(Table, y= PD['xFeat'], x= PD['yFeat'], orientation='h', color = PD['ColorFeat'],
text = PD['yFeat'], color_discrete_sequence = PD['Colors'], hover_data= Table.columns, barmode='group')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray', range= PD['Lims'])
#
fig.update_traces(marker_line_color= PD['line_color'], marker_line_width=1, opacity=1)
fig.update_layout(legend_orientation='v', legend_title_text=PD['ColorFeat'],
plot_bgcolor= 'white', height= PD['height'], width= 980,
title={'text': '<b>' + PD['Title'] + '<b>', 'x':0.5, 'y':PD['titleY'],
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Top = 15
Table = TableAgg(Feat = ['Country'])
Ind = Table.sort_values(by=['Count'], ascending=False)['Country'].unique().tolist()
PD = dict(xFeat = 'Country', yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1.25,
Title = 'Responses by Countries (Top %i)' % Top,
Lims = [0, 5e3], Colors = ['Bisque','LightGreen','CornFlowerBlue'], titleY = 0.95, height = 500)
YearBarPlot(Table.loc[Table['Country'].isin(Ind[:Top])], PD)
Table = Table.loc[~Table.Country.isin(['Other'])]
Table['alpha3'] = Table.Country.apply(lambda x: pycountry_convert.country_name_to_country_alpha3(x, cn_name_format="default"))
fig = px.choropleth(Table, locations= 'alpha3', color="Count", hover_name="Country",
animation_frame="Year", range_color=[0,5e3], color_continuous_scale= "Jet")
fig.update_layout(plot_bgcolor= 'white', width = 980,
title={'text': '<b>' + 'Responses by Countries' + '<b>', 'x':0.5, 'y':PD['titleY'],
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
It can be seen that each year, the highest number of responses are from India and the United States.
def YearPiePlot(Table, PD):
Years = np.sort(Table['Year'].unique())
fig = make_subplots(rows=1, cols=len(Years), specs=[[{'type':'domain'}]*len(Years)],
subplot_titles=['<b>%s<b>' % x for x in Years])
for j in range(len(Years)):
fig.add_trace(go.Pie(labels=Table.loc[Table['Year'] == Years[j], PD['Feat']].values,
values=Table.loc[Table['Year'] == Years[j], 'Count'].values, name= Years[j],
textfont=dict(size=16), marker_colors = PD['Colors'],
marker=dict(colors = PD['Colors'], line=dict(color='black', width=1))), 1, j+1)
fig.update_traces(hole=PD['hole'], marker_line_color=PD['line_color'], marker_line_width= PD['line_width'], opacity=1)
fig.update_layout(plot_bgcolor= 'white', height= PD['height'], width= 980,
title={'text': '<b>' + PD['Title'] + '<b>', 'x':0.5, 'y':PD['titleY'],
'xanchor': 'center', 'yanchor': 'top'})
fig.update_layout(legend_title_text=PD['Feat'], legend=dict(orientation="h", x=PD['legend_x'], y=PD['legend_y']))
fig.show()
Feat = 'Gender'
Table = TableAgg([Feat])
PD = dict(Feat = 'Gender', line_color = 'Black', ColorFeat = 'Year', line_width = 1, hole = .5, titleY = .95,
Title = 'Responses by %s for various years' % Feat,
Colors = ['Tomato','GreenYellow','LightSkyBlue'], height = 400,
legend_x = .32, legend_y = -.1)
YearPiePlot(Table, PD)
It can be seen that each year, the majority of the participants are men. This graph can be specified by the country as follows.
def TwoFeaturePlot(Table, Feat1, Feat2, PD, Ind = Ind):
Years = Table['Year'].unique().tolist()
fig = make_subplots(rows=1, cols=len(Years),
subplot_titles=['<b>%s<b>' % x for x in Years])
Sub = Table[Feat2].unique().tolist()
for i in range(len(Years)):
Temp = Table.loc[(Table['Year'] == Years[i])]
fig0 = px.bar(Temp, x= Feat1, y= 'Percentage', color = Feat2, text = 'Percentage',
barmode='stack', color_discrete_sequence = PD['Colors'])
for j in range(len(fig0['data'])):
fig.add_trace(fig0['data'][j], row=1, col= i+1)
if i == 0:
fig.update_yaxes(title = 'Percentage', row=1, col=i + 1)
if i>0:
fig.update_traces(showlegend=False, row=1, col=i + 1)
fig.update_layout(barmode='relative')
fig.update_traces(marker_line_color=PD['line_color'], marker_line_width= PD['line_width'], opacity=1)
fig.update_layout(legend_orientation='v', legend_title_text=PD['ColorFeat'],
plot_bgcolor= 'white', height= PD['height'], width= 980,
title={'text': '<b>' + PD['Title'] + '<b>', 'x':0.5, 'y':PD['titleY'],
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Top = 5
Feat1 = 'Country'
Feat2 = 'Gender'
Table = TableAgg(Feat = [Feat1, Feat2])
Table['Percentage'] = np.nan
for c in Table[Feat1].unique():
for y in Table['Year'].unique():
Temp = Table.loc[(Table[Feat1] == c) & (Table['Year'] == y),
'Count']/Table.loc[(Table[Feat1] == c) & (Table['Year'] == y), 'Count'].sum()
Table.loc[(Table[Feat1] == c) & (Table['Year'] == y), 'Percentage'] = np.round(100*Temp.values,2)
del Temp
PD = dict(line_color = 'Black', ColorFeat = 'Year', line_width = 1, titleY = .90,
Title = 'Responses by %s and %s for various years' % (Feat1, Feat2),
Colors = ['Tomato','GreenYellow','LightSkyBlue'], height = 550)
Table = Table.loc[Table[Feat1].isin(Ind[:Top])]
TwoFeaturePlot(Table, Feat1, Feat2, PD)
The number and percentage of the participants can be analyzed by continent as well.
Feat = 'Continent'
Table = TableAgg([Feat])
PD = dict(Feat = 'Continent', line_color = 'Black', ColorFeat = 'Year', line_width = 1, hole = .5, titleY = .95,
Title = 'Responses by %s for various years' % Feat,
Colors = ['deepskyblue','GreenYellow','OrangeRed', 'violet','LimeGreen','Olive'],
height = 500,legend_x = .09, legend_y = -.01)
YearPiePlot(Table, PD)
##
Top = 10
Feat1 = 'Continent'
Feat2 = 'Gender'
Table = TableAgg(Feat = [Feat1, Feat2])
Table['Percentage'] = np.nan
for c in Table[Feat1].unique():
for y in Table['Year'].unique():
Temp = Table.loc[(Table[Feat1] == c) & (Table['Year'] == y),
'Count']/Table.loc[(Table[Feat1] == c) & (Table['Year'] == y), 'Count'].sum()
Table.loc[(Table[Feat1] == c) & (Table['Year'] == y), 'Percentage'] = np.round(100*Temp.values,2)
del Temp
PD = dict(line_color = 'Black', ColorFeat = 'Year', line_width = 1, titleY = .90,
Title = 'Responses by %s and %s for various years' % (Feat1, Feat2),
Colors = ['Tomato','GreenYellow','LightSkyBlue'], height = 450)
TwoFeaturePlot(Table, Feat1, Feat2, PD)
Top = 10
Table = TableAgg(Feat = ['Continent', 'Country'])
year_colors = ['Bisque','LightGreen','CornFlowerBlue', 'Purple']
fig = make_subplots(rows=3, cols=4, vertical_spacing = 0.08,
specs=[[{"colspan": 3}, None, None,{"colspan": 1}],
[{"colspan": 4}, None, None, None],
[{"colspan": 2}, None,{"colspan": 2}, None]],
subplot_titles=(['<b>%s<b>' % x for x in ['Asia', 'North America', 'Europe','Africa', 'South America']]))
## Asia
fig0 = px.bar(Table.loc[Table[Feat1] == 'Asia'], x= 'Country', y= 'Count', color = 'Year', text = 'Count',
barmode='group', color_discrete_sequence = year_colors)
for i in range(len(fig0['data'])):
fig.add_trace(fig0['data'][i], row=1, col=1)
fig.update_traces(showlegend=False, row=1, col=1)
fig.update_yaxes(range= [0, 5000], row=1, col=1)
## North America
fig0 = px.bar(Table.loc[Table[Feat1] == 'North America'], x= 'Country', y= 'Count', color = 'Year', text = 'Count',
barmode='group', color_discrete_sequence = year_colors)
for i in range(len(fig0['data'])):
fig.add_trace(fig0['data'][i], row=1, col=4)
fig.update_traces(showlegend=False, row=1, col=4)
fig.update_yaxes(range= [0, 5000], row=1, col=4)
## Europe
fig0 = px.bar(Table.loc[Table[Feat1] == 'Europe'], x= 'Country', y= 'Count', color = 'Year', text = 'Count',
barmode='group', color_discrete_sequence = year_colors)
for i in range(len(fig0['data'])):
fig.add_trace(fig0['data'][i], row=2, col=1)
fig.update_yaxes(range= [0, 1000], row=2, col=1)
## Africa
fig0 = px.bar(Table.loc[Table[Feat1] == 'Africa'], x= 'Country', y= 'Count', color = 'Year', text = 'Count',
barmode='group', color_discrete_sequence = year_colors)
for i in range(len(fig0['data'])):
fig.add_trace(fig0['data'][i], row=3, col=1)
fig.update_traces(showlegend=False, row=3, col=1)
fig.update_yaxes(range= [0, 500], row=3, col=1)
## South America
fig0 = px.bar(Table.loc[Table[Feat1] == 'South America'], x= 'Country', y= 'Count', color = 'Year', text = 'Count',
barmode='group', color_discrete_sequence = year_colors)
for i in range(len(fig0['data'])):
fig.add_trace(fig0['data'][i], row=3, col=3)
fig.update_traces(showlegend=False, row=3, col=3)
fig.update_yaxes(range= [0, 800], row=3, col=3)
##
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=True, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(marker_line_color= 'Black', marker_line_width=1, opacity=1)
fig.update_layout(legend_orientation='v', legend_title_text='Year', plot_bgcolor= 'white', height= 1200, width= 980,
title={'text': '<b>' + 'Responses by Continent and Country' + '<b>', 'x':0.5, 'y':.95,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Table = TableAgg(Feat = ['Age Group'])
PD = dict(xFeat = 'Age Group', yFeat = 'Count', line_color = 'Black', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by Age Group',
Lims = [0, 7e3], Colors = year_colors, titleY = 0.95, height = 500)
YearBarPlot(Table, PD)
Table = TableAgg(Feat = ['Age Group', 'Country'])
PD = dict(xFeat = 'Age Group', yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by Age Group (United States)',
Lims = [0, 12e2], Colors = year_colors, titleY = 0.95, height = 500)
YearBarPlot(Table[Table['Country'] == 'United States'], PD)
PD['Lims'] = [0, 1600]
PD['Title'] = 'Responses by Age Group (India)'
YearBarPlot(Table[Table['Country'] == 'India'], PD)
Table = TableAgg(Feat = ['FormalEducation'])
PD = dict(xFeat = 'FormalEducation', yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by Age Group (United States)',
Lims = [0, 12e3], Colors = year_colors, titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
Top = 5
Feat1 = 'Country'
Feat2 = 'FormalEducation'
Table = TableAgg(Feat = [Feat1, Feat2])
Table[Feat2] = Table[Feat2].replace({'Some college/university study without earning a bachelor’s degree':'Some college'})
Table['Percentage'] = np.nan
for c in Table[Feat1].unique():
for y in Table['Year'].unique():
Temp = Table.loc[(Table[Feat1] == c) & (Table['Year'] == y),
'Count']/Table.loc[(Table[Feat1] == c) & (Table['Year'] == y), 'Count'].sum()
Table.loc[(Table[Feat1] == c) & (Table['Year'] == y), 'Percentage'] = np.round(100*Temp.values,2)
del Temp
PD = dict(line_color = 'Black', ColorFeat = 'Year', line_width = 1, titleY = .90,
Title = 'Responses by %s and %s for various years' % (Feat1, Feat2),
Colors = None, height = 550)
Table = Table.loc[Table[Feat1].isin(Ind[:Top])]
TwoFeaturePlot(Table, Feat1, Feat2, PD)
Top = 25
Table = TableAgg(Feat = ['CurrentJobTitle'])
Ind = Table.sort_values(by =['Count'], ascending = False)['CurrentJobTitle'].unique()[:Top]
Table = Table.loc[(Table['CurrentJobTitle'].isin(Ind)) & (Table['Year']> '2017')]
PD = dict(xFeat = 'CurrentJobTitle', yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1.25,
Title = 'Responses by Current Job Title',
Lims = [0, 6e3], Colors = year_colors[1:], titleY = 0.95, height = 500)
YearBarPlot(Table, PD)
Table = TableAgg(Feat = ['CurrentSalary'], data = [Data19, Data20], Lables = ['2019', '2020'])
Temp = Table['CurrentSalary'].str.split(pat = "-", expand=True)
Temp.columns = ['SalaryMin','SalaryMax']
mylist = ['+',',','$','>']
for c in Temp.columns:
for s in mylist:
Temp[c] = Temp[c].str.replace(s, '')
Temp.SalaryMin = Temp.SalaryMin.fillna(0)
Temp.SalaryMin = Temp.SalaryMin.astype(int)*(1000)
Temp = pd.concat([Table, Temp], axis=1)
Temp.loc[Temp.CurrentSalary.isin(['300-400,000','400-500,000']), ['CurrentSalary','SalaryMin']] = '300-500,000', int(3e5)
Temp.loc[Temp.CurrentSalary == '$0-999', 'CurrentSalary'] ='0-999'
Temp.loc[Temp.CurrentSalary == '> $500,000', 'CurrentSalary'] ='500,000+'
Table = Temp.sort_values(['SalaryMin'])
del Temp, mylist
PD = dict(xFeat = 'CurrentSalary', yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by Current Job Title',
Lims = [0, 1600], Colors = year_colors[-2:], titleY = 0.95, height = 500)
YearBarPlot(Table, PD)
def mysplit(Text, S):
_, Out = Text.split(S)
return Out
def mysplit2(Text):
Out, _, _ = Text.partition(' (')
return Out
def List_Break(mylist, n = 4):
Out = []
for x in mylist:
y = x.split()
if len(y)> n:
z = ' '.join(y[:n])
sep = np.arange(0, len(y), n)[1:]
for n in sep:
z = z + '<br>'+ ' '.join(y[n:])
else:
z = ' '.join(y)
Out.append(z)
Out = dict(zip(mylist,Out))
return Out
data = [Data17, Data18, Data19, Data20]
S = 'Select any activities that make up an important part of your role at work: (Select all that apply) - Selected Choice - '
Col = 'Activities'
data = [Data18, Data19, Data20]
Labels = ['2018', '2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 1e4], Colors = year_colors[1:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Who/what are your favorite media sources that report on data science topics? (Select all that apply) - Selected Choice - '
Col = 'Media Sources'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 12e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'On which platforms have you begun or completed data science courses? (Select all that apply) - Selected Choice - '
Col = 'Data Science Courses'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 1e4], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = """Which of the following integrated development environments (IDE's) do you use on a regular basis? (Select all that apply) - Selected Choice - """
Col = """IDE's"""
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 12e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following hosted notebook products do you use on a regular basis? (Select all that apply) - Selected Choice - '
Col = 'Notebook Host'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 5e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'What programming languages do you use on a regular basis? (Select all that apply) - Selected Choice - '
Col = 'Programming Languages'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 14e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'What data visualization libraries or tools do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Visualization Libraries'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 12e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which types of specialized hardware do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Specialized Hardwares'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 12e3], Colors = year_colors[-2:], titleY = 0.95, height = 400)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following ML algorithms do you use on a regular basis? (Select all that apply): - Selected Choice -'
Col = 'ML Algorithms'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 12e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which categories of ML tools do you use on a regular basis? (Select all that apply) - Selected Choice - '
Col = 'ML Tools'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 8e3], Colors = year_colors[-2:], titleY = 0.95, height = 400)
YearBarPlot(Table, PD, xy = False)
S = 'Which categories of computer vision methods do you use on a regular basis? (Select all that apply) - Selected Choice - '
Col = 'Computer Vision Methods'
Temp = Data19[Search_df(Data19,S)]
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 4e3], Colors = year_colors[-2:], titleY = 0.95, height = 400)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following natural language processing (NLP) methods do you use on a regular basis? (Select all that apply) - Selected Choice - '
Col = 'Natural Language Processing (NLP)'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 2500], Colors = year_colors[-2:], titleY = 0.95, height = 500)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following machine learning frameworks do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Machine Learning Frameworks'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 10e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following cloud computing platforms do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Cloud Computing Platforms'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 3e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which specific cloud computing products do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Cloud Computing Products'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 35e2], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which specific big data / analytics products do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Big Data / Analytics Products'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 4500], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following machine learning products do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Machine Learning Products'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 5e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which automated machine learning tools (or partial AutoML tools) do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Automated Machine Learning Tools'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 6e3], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)
S = 'Which of the following relational database products do you use on a regular basis? (Select all that apply) - Selected Choice -'
Col = 'Automated Machine Learning Tools'
data = [Data19, Data20]
Labels = ['2019', '2020']
Table = pd.DataFrame(columns = [Col, 'Count', 'Year'])
for df, label in zip(data, Labels):
Temp0 = df[Search_df(df,S)]
Temp0.columns = [mysplit(x, S) for x in Temp0.columns]
Temp0 = Temp0.agg({'count'}).T.reset_index(drop = False)
Temp0.columns = [Col, 'Count']
Temp0['Year'] = label
Table = pd.concat([Table, Temp0])
del Temp0
Table[Col] = Table[Col].apply(lambda x: mysplit2(x))
# Table[Col] = Table[Col].replace(List_Break(Table[Col].unique().tolist()))
PD = dict(xFeat = Col, yFeat = 'Count', line_color = 'Navy', ColorFeat = 'Year', line_width = 1,
Title = 'Responses by %s' % Col,
Lims = [0, 3500], Colors = year_colors[-2:], titleY = 0.95, height = 600)
YearBarPlot(Table, PD, xy = False)